import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv('WA_Fn-UseC_-HR-Employee-Attrition.csv')
data.head()
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | Travel_Rarely | 1102 | Sales | 1 | 2 | Life Sciences | 1 | 1 | ... | 1 | 80 | 0 | 8 | 0 | 1 | 6 | 4 | 0 | 5 |
| 1 | 49 | No | Travel_Frequently | 279 | Research & Development | 8 | 1 | Life Sciences | 1 | 2 | ... | 4 | 80 | 1 | 10 | 3 | 3 | 10 | 7 | 1 | 7 |
| 2 | 37 | Yes | Travel_Rarely | 1373 | Research & Development | 2 | 2 | Other | 1 | 4 | ... | 2 | 80 | 0 | 7 | 3 | 3 | 0 | 0 | 0 | 0 |
| 3 | 33 | No | Travel_Frequently | 1392 | Research & Development | 3 | 4 | Life Sciences | 1 | 5 | ... | 3 | 80 | 0 | 8 | 3 | 3 | 8 | 7 | 3 | 0 |
| 4 | 27 | No | Travel_Rarely | 591 | Research & Development | 2 | 1 | Medical | 1 | 7 | ... | 4 | 80 | 1 | 6 | 3 | 3 | 2 | 2 | 2 | 2 |
5 rows × 35 columns
data.tail()
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1465 | 36 | No | Travel_Frequently | 884 | Research & Development | 23 | 2 | Medical | 1 | 2061 | ... | 3 | 80 | 1 | 17 | 3 | 3 | 5 | 2 | 0 | 3 |
| 1466 | 39 | No | Travel_Rarely | 613 | Research & Development | 6 | 1 | Medical | 1 | 2062 | ... | 1 | 80 | 1 | 9 | 5 | 3 | 7 | 7 | 1 | 7 |
| 1467 | 27 | No | Travel_Rarely | 155 | Research & Development | 4 | 3 | Life Sciences | 1 | 2064 | ... | 2 | 80 | 1 | 6 | 0 | 3 | 6 | 2 | 0 | 3 |
| 1468 | 49 | No | Travel_Frequently | 1023 | Sales | 2 | 3 | Medical | 1 | 2065 | ... | 4 | 80 | 0 | 17 | 3 | 2 | 9 | 6 | 0 | 8 |
| 1469 | 34 | No | Travel_Rarely | 628 | Research & Development | 8 | 3 | Medical | 1 | 2068 | ... | 1 | 80 | 0 | 6 | 3 | 4 | 4 | 3 | 1 | 2 |
5 rows × 35 columns
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1470 entries, 0 to 1469 Data columns (total 35 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 1470 non-null int64 1 Attrition 1470 non-null object 2 BusinessTravel 1470 non-null object 3 DailyRate 1470 non-null int64 4 Department 1470 non-null object 5 DistanceFromHome 1470 non-null int64 6 Education 1470 non-null int64 7 EducationField 1470 non-null object 8 EmployeeCount 1470 non-null int64 9 EmployeeNumber 1470 non-null int64 10 EnvironmentSatisfaction 1470 non-null int64 11 Gender 1470 non-null object 12 HourlyRate 1470 non-null int64 13 JobInvolvement 1470 non-null int64 14 JobLevel 1470 non-null int64 15 JobRole 1470 non-null object 16 JobSatisfaction 1470 non-null int64 17 MaritalStatus 1470 non-null object 18 MonthlyIncome 1470 non-null int64 19 MonthlyRate 1470 non-null int64 20 NumCompaniesWorked 1470 non-null int64 21 Over18 1470 non-null object 22 OverTime 1470 non-null object 23 PercentSalaryHike 1470 non-null int64 24 PerformanceRating 1470 non-null int64 25 RelationshipSatisfaction 1470 non-null int64 26 StandardHours 1470 non-null int64 27 StockOptionLevel 1470 non-null int64 28 TotalWorkingYears 1470 non-null int64 29 TrainingTimesLastYear 1470 non-null int64 30 WorkLifeBalance 1470 non-null int64 31 YearsAtCompany 1470 non-null int64 32 YearsInCurrentRole 1470 non-null int64 33 YearsSinceLastPromotion 1470 non-null int64 34 YearsWithCurrManager 1470 non-null int64 dtypes: int64(26), object(9) memory usage: 402.1+ KB
data.describe()
| Age | DailyRate | DistanceFromHome | Education | EmployeeCount | EmployeeNumber | EnvironmentSatisfaction | HourlyRate | JobInvolvement | JobLevel | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.0 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | ... | 1470.000000 | 1470.0 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 | 1470.000000 |
| mean | 36.923810 | 802.485714 | 9.192517 | 2.912925 | 1.0 | 1024.865306 | 2.721769 | 65.891156 | 2.729932 | 2.063946 | ... | 2.712245 | 80.0 | 0.793878 | 11.279592 | 2.799320 | 2.761224 | 7.008163 | 4.229252 | 2.187755 | 4.123129 |
| std | 9.135373 | 403.509100 | 8.106864 | 1.024165 | 0.0 | 602.024335 | 1.093082 | 20.329428 | 0.711561 | 1.106940 | ... | 1.081209 | 0.0 | 0.852077 | 7.780782 | 1.289271 | 0.706476 | 6.126525 | 3.623137 | 3.222430 | 3.568136 |
| min | 18.000000 | 102.000000 | 1.000000 | 1.000000 | 1.0 | 1.000000 | 1.000000 | 30.000000 | 1.000000 | 1.000000 | ... | 1.000000 | 80.0 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 30.000000 | 465.000000 | 2.000000 | 2.000000 | 1.0 | 491.250000 | 2.000000 | 48.000000 | 2.000000 | 1.000000 | ... | 2.000000 | 80.0 | 0.000000 | 6.000000 | 2.000000 | 2.000000 | 3.000000 | 2.000000 | 0.000000 | 2.000000 |
| 50% | 36.000000 | 802.000000 | 7.000000 | 3.000000 | 1.0 | 1020.500000 | 3.000000 | 66.000000 | 3.000000 | 2.000000 | ... | 3.000000 | 80.0 | 1.000000 | 10.000000 | 3.000000 | 3.000000 | 5.000000 | 3.000000 | 1.000000 | 3.000000 |
| 75% | 43.000000 | 1157.000000 | 14.000000 | 4.000000 | 1.0 | 1555.750000 | 4.000000 | 83.750000 | 3.000000 | 3.000000 | ... | 4.000000 | 80.0 | 1.000000 | 15.000000 | 3.000000 | 3.000000 | 9.000000 | 7.000000 | 3.000000 | 7.000000 |
| max | 60.000000 | 1499.000000 | 29.000000 | 5.000000 | 1.0 | 2068.000000 | 4.000000 | 100.000000 | 4.000000 | 5.000000 | ... | 4.000000 | 80.0 | 3.000000 | 40.000000 | 6.000000 | 4.000000 | 40.000000 | 18.000000 | 15.000000 | 17.000000 |
8 rows × 26 columns
data.isnull().sum()
Age 0 Attrition 0 BusinessTravel 0 DailyRate 0 Department 0 DistanceFromHome 0 Education 0 EducationField 0 EmployeeCount 0 EmployeeNumber 0 EnvironmentSatisfaction 0 Gender 0 HourlyRate 0 JobInvolvement 0 JobLevel 0 JobRole 0 JobSatisfaction 0 MaritalStatus 0 MonthlyIncome 0 MonthlyRate 0 NumCompaniesWorked 0 Over18 0 OverTime 0 PercentSalaryHike 0 PerformanceRating 0 RelationshipSatisfaction 0 StandardHours 0 StockOptionLevel 0 TotalWorkingYears 0 TrainingTimesLastYear 0 WorkLifeBalance 0 YearsAtCompany 0 YearsInCurrentRole 0 YearsSinceLastPromotion 0 YearsWithCurrManager 0 dtype: int64
cor = data.corr()
<ipython-input-57-06847dd9a2e1>:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. cor = data.corr()
fig, ax = plt.subplots(figsize=(25,25))
sns.heatmap(cor, annot=True)
<Axes: >
sns.pairplot(data)
<seaborn.axisgrid.PairGrid at 0x7b7a7577e530>
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
data["BusinessTravel"]=le.fit_transform(data["BusinessTravel"])
data["Department"]=le.fit_transform(data["Department"])
data["EducationField"]=le.fit_transform(data["EducationField"])
data["Gender"]=le.fit_transform(data["Gender"])
data["JobRole"]=le.fit_transform(data["JobRole"])
data["MaritalStatus"]=le.fit_transform(data["MaritalStatus"])
data["Over18"]=le.fit_transform(data["Over18"])
data["OverTime"]=le.fit_transform(data["OverTime"])
data.head()
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 41 | Yes | 2 | 1102 | 2 | 1 | 2 | 1 | 1 | 1 | ... | 1 | 80 | 0 | 8 | 0 | 1 | 6 | 4 | 0 | 5 |
| 1 | 49 | No | 1 | 279 | 1 | 8 | 1 | 1 | 1 | 2 | ... | 4 | 80 | 1 | 10 | 3 | 3 | 10 | 7 | 1 | 7 |
| 2 | 37 | Yes | 2 | 1373 | 1 | 2 | 2 | 4 | 1 | 4 | ... | 2 | 80 | 0 | 7 | 3 | 3 | 0 | 0 | 0 | 0 |
| 3 | 33 | No | 1 | 1392 | 1 | 3 | 4 | 1 | 1 | 5 | ... | 3 | 80 | 0 | 8 | 3 | 3 | 8 | 7 | 3 | 0 |
| 4 | 27 | No | 2 | 591 | 1 | 2 | 1 | 3 | 1 | 7 | ... | 4 | 80 | 1 | 6 | 3 | 3 | 2 | 2 | 2 | 2 |
5 rows × 35 columns
data.tail()
| Age | Attrition | BusinessTravel | DailyRate | Department | DistanceFromHome | Education | EducationField | EmployeeCount | EmployeeNumber | ... | RelationshipSatisfaction | StandardHours | StockOptionLevel | TotalWorkingYears | TrainingTimesLastYear | WorkLifeBalance | YearsAtCompany | YearsInCurrentRole | YearsSinceLastPromotion | YearsWithCurrManager | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1465 | 36 | No | 1 | 884 | 1 | 23 | 2 | 3 | 1 | 2061 | ... | 3 | 80 | 1 | 17 | 3 | 3 | 5 | 2 | 0 | 3 |
| 1466 | 39 | No | 2 | 613 | 1 | 6 | 1 | 3 | 1 | 2062 | ... | 1 | 80 | 1 | 9 | 5 | 3 | 7 | 7 | 1 | 7 |
| 1467 | 27 | No | 2 | 155 | 1 | 4 | 3 | 1 | 1 | 2064 | ... | 2 | 80 | 1 | 6 | 0 | 3 | 6 | 2 | 0 | 3 |
| 1468 | 49 | No | 1 | 1023 | 2 | 2 | 3 | 3 | 1 | 2065 | ... | 4 | 80 | 0 | 17 | 3 | 2 | 9 | 6 | 0 | 8 |
| 1469 | 34 | No | 2 | 628 | 1 | 8 | 3 | 3 | 1 | 2068 | ... | 1 | 80 | 0 | 6 | 3 | 4 | 4 | 3 | 1 | 2 |
5 rows × 35 columns
X=data.drop(columns=["EmployeeNumber","EmployeeCount","StandardHours","Attrition","Over18"],axis=1)
y=data["Attrition"]
from sklearn.preprocessing import MinMaxScaler
ms=MinMaxScaler()
X_Scaled=ms.fit_transform(X)
cor=data.corr()
<ipython-input-76-410fe4458127>:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. cor=data.corr()
fig, ax = plt.subplots(figsize=(30,30))
sns.heatmap(cor, annot=True)
<Axes: >
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X_Scaled,y,test_size =0.2,random_state =0)
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(x_train,y_train)
LogisticRegression(random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(random_state=0)
from sklearn.metrics import accuracy_score,confusion_matrix
y_pred = classifier.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)*100
[[242 3] [ 32 17]]
88.09523809523809
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report,roc_auc_score,roc_curve
print(classification_report(y_test,y_pred))
precision recall f1-score support
No 0.88 0.99 0.93 245
Yes 0.85 0.35 0.49 49
accuracy 0.88 294
macro avg 0.87 0.67 0.71 294
weighted avg 0.88 0.88 0.86 294
from sklearn.tree import DecisionTreeClassifier
dtc=DecisionTreeClassifier()
dtc.fit(x_train,y_train)
DecisionTreeClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier()
from sklearn.metrics import accuracy_score,confusion_matrix
y_pred = dtc.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)*100
[[207 38] [ 33 16]]
75.85034013605441
from sklearn import tree
plt.figure(figsize=(25,15))
tree.plot_tree(dtc,filled=True)
[Text(0.32152859669811323, 0.9722222222222222, 'x[23] <= 0.038\ngini = 0.269\nsamples = 1176\nvalue = [988, 188]'), Text(0.07547169811320754, 0.9166666666666666, 'x[14] <= 0.75\ngini = 0.5\nsamples = 78\nvalue = [39, 39]'), Text(0.04716981132075472, 0.8611111111111112, 'x[4] <= 0.554\ngini = 0.426\nsamples = 39\nvalue = [27, 12]'), Text(0.031446540880503145, 0.8055555555555556, 'x[13] <= 0.167\ngini = 0.312\nsamples = 31\nvalue = [25, 6]'), Text(0.018867924528301886, 0.75, 'x[18] <= 0.5\ngini = 0.49\nsamples = 7\nvalue = [3, 4]'), Text(0.012578616352201259, 0.6944444444444444, 'x[8] <= 0.5\ngini = 0.375\nsamples = 4\nvalue = [3, 1]'), Text(0.006289308176100629, 0.6388888888888888, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.018867924528301886, 0.6388888888888888, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.025157232704402517, 0.6944444444444444, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.0440251572327044, 0.75, 'x[17] <= 0.056\ngini = 0.153\nsamples = 24\nvalue = [22, 2]'), Text(0.03773584905660377, 0.6944444444444444, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.050314465408805034, 0.6944444444444444, 'x[7] <= 0.167\ngini = 0.083\nsamples = 23\nvalue = [22, 1]'), Text(0.0440251572327044, 0.6388888888888888, 'x[20] <= 0.5\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.03773584905660377, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.050314465408805034, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.05660377358490566, 0.6388888888888888, 'gini = 0.0\nsamples = 21\nvalue = [21, 0]'), Text(0.06289308176100629, 0.8055555555555556, 'x[19] <= 0.679\ngini = 0.375\nsamples = 8\nvalue = [2, 6]'), Text(0.05660377358490566, 0.75, 'gini = 0.0\nsamples = 6\nvalue = [0, 6]'), Text(0.06918238993710692, 0.75, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.10377358490566038, 0.8611111111111112, 'x[9] <= 0.364\ngini = 0.426\nsamples = 39\nvalue = [12, 27]'), Text(0.0880503144654088, 0.8055555555555556, 'x[15] <= 0.1\ngini = 0.133\nsamples = 14\nvalue = [1, 13]'), Text(0.08176100628930817, 0.75, 'gini = 0.0\nsamples = 13\nvalue = [0, 13]'), Text(0.09433962264150944, 0.75, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.11949685534591195, 0.8055555555555556, 'x[18] <= 0.5\ngini = 0.493\nsamples = 25\nvalue = [11, 14]'), Text(0.1069182389937107, 0.75, 'x[2] <= 0.106\ngini = 0.484\nsamples = 17\nvalue = [10, 7]'), Text(0.10062893081761007, 0.6944444444444444, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.11320754716981132, 0.6944444444444444, 'x[21] <= 0.167\ngini = 0.408\nsamples = 14\nvalue = [10, 4]'), Text(0.10062893081761007, 0.6388888888888888, 'x[23] <= 0.013\ngini = 0.375\nsamples = 4\nvalue = [1, 3]'), Text(0.09433962264150944, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.1069182389937107, 0.5833333333333334, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.12578616352201258, 0.6388888888888888, 'x[3] <= 0.75\ngini = 0.18\nsamples = 10\nvalue = [9, 1]'), Text(0.11949685534591195, 0.5833333333333334, 'gini = 0.0\nsamples = 9\nvalue = [9, 0]'), Text(0.1320754716981132, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.1320754716981132, 0.75, 'x[15] <= 0.103\ngini = 0.219\nsamples = 8\nvalue = [1, 7]'), Text(0.12578616352201258, 0.6944444444444444, 'gini = 0.0\nsamples = 7\nvalue = [0, 7]'), Text(0.13836477987421383, 0.6944444444444444, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.5675854952830188, 0.9166666666666666, 'x[18] <= 0.5\ngini = 0.235\nsamples = 1098\nvalue = [949, 149]'), Text(0.31087853773584906, 0.8611111111111112, 'x[25] <= 0.167\ngini = 0.162\nsamples = 798\nvalue = [727, 71]'), Text(0.1792452830188679, 0.8055555555555556, 'x[2] <= 0.747\ngini = 0.38\nsamples = 47\nvalue = [35, 12]'), Text(0.17295597484276728, 0.75, 'x[10] <= 0.5\ngini = 0.463\nsamples = 33\nvalue = [21, 12]'), Text(0.1509433962264151, 0.6944444444444444, 'x[4] <= 0.446\ngini = 0.42\nsamples = 10\nvalue = [3, 7]'), Text(0.14465408805031446, 0.6388888888888888, 'gini = 0.0\nsamples = 6\nvalue = [0, 6]'), Text(0.15723270440251572, 0.6388888888888888, 'x[5] <= 0.125\ngini = 0.375\nsamples = 4\nvalue = [3, 1]'), Text(0.1509433962264151, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.16352201257861634, 0.5833333333333334, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.1949685534591195, 0.6944444444444444, 'x[24] <= 0.583\ngini = 0.34\nsamples = 23\nvalue = [18, 5]'), Text(0.18238993710691823, 0.6388888888888888, 'x[28] <= 0.333\ngini = 0.117\nsamples = 16\nvalue = [15, 1]'), Text(0.1761006289308176, 0.5833333333333334, 'gini = 0.0\nsamples = 15\nvalue = [15, 0]'), Text(0.18867924528301888, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.20754716981132076, 0.6388888888888888, 'x[26] <= 0.287\ngini = 0.49\nsamples = 7\nvalue = [3, 4]'), Text(0.20125786163522014, 0.5833333333333334, 'gini = 0.0\nsamples = 4\nvalue = [0, 4]'), Text(0.2138364779874214, 0.5833333333333334, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.18553459119496854, 0.75, 'gini = 0.0\nsamples = 14\nvalue = [14, 0]'), Text(0.4425117924528302, 0.8055555555555556, 'x[23] <= 0.975\ngini = 0.145\nsamples = 751\nvalue = [692, 59]'), Text(0.43622248427672955, 0.75, 'x[26] <= 0.113\ngini = 0.143\nsamples = 750\nvalue = [692, 58]'), Text(0.3018867924528302, 0.6944444444444444, 'x[7] <= 0.167\ngini = 0.218\nsamples = 257\nvalue = [225, 32]'), Text(0.2468553459119497, 0.6388888888888888, 'x[29] <= 0.147\ngini = 0.355\nsamples = 65\nvalue = [50, 15]'), Text(0.22641509433962265, 0.5833333333333334, 'x[29] <= 0.029\ngini = 0.303\nsamples = 59\nvalue = [48, 11]'), Text(0.20440251572327045, 0.5277777777777778, 'x[10] <= 0.5\ngini = 0.463\nsamples = 22\nvalue = [14, 8]'), Text(0.1918238993710692, 0.4722222222222222, 'x[9] <= 0.179\ngini = 0.198\nsamples = 9\nvalue = [8, 1]'), Text(0.18553459119496854, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.19811320754716982, 0.4166666666666667, 'gini = 0.0\nsamples = 8\nvalue = [8, 0]'), Text(0.2169811320754717, 0.4722222222222222, 'x[9] <= 0.4\ngini = 0.497\nsamples = 13\nvalue = [6, 7]'), Text(0.21069182389937108, 0.4166666666666667, 'gini = 0.0\nsamples = 4\nvalue = [4, 0]'), Text(0.22327044025157233, 0.4166666666666667, 'x[4] <= 0.286\ngini = 0.346\nsamples = 9\nvalue = [2, 7]'), Text(0.2169811320754717, 0.3611111111111111, 'x[28] <= 0.1\ngini = 0.444\nsamples = 3\nvalue = [2, 1]'), Text(0.21069182389937108, 0.3055555555555556, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.22327044025157233, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.22955974842767296, 0.3611111111111111, 'gini = 0.0\nsamples = 6\nvalue = [0, 6]'), Text(0.24842767295597484, 0.5277777777777778, 'x[13] <= 0.167\ngini = 0.149\nsamples = 37\nvalue = [34, 3]'), Text(0.24213836477987422, 0.4722222222222222, 'x[26] <= 0.088\ngini = 0.5\nsamples = 6\nvalue = [3, 3]'), Text(0.2358490566037736, 0.4166666666666667, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.24842767295597484, 0.4166666666666667, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.25471698113207547, 0.4722222222222222, 'gini = 0.0\nsamples = 31\nvalue = [31, 0]'), Text(0.2672955974842767, 0.5833333333333334, 'x[10] <= 0.167\ngini = 0.444\nsamples = 6\nvalue = [2, 4]'), Text(0.2610062893081761, 0.5277777777777778, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.27358490566037735, 0.5277777777777778, 'x[20] <= 0.5\ngini = 0.444\nsamples = 3\nvalue = [2, 1]'), Text(0.2672955974842767, 0.4722222222222222, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.279874213836478, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.35691823899371067, 0.6388888888888888, 'x[0] <= 0.321\ngini = 0.161\nsamples = 192\nvalue = [175, 17]'), Text(0.3081761006289308, 0.5833333333333334, 'x[6] <= 0.1\ngini = 0.294\nsamples = 67\nvalue = [55, 12]'), Text(0.3018867924528302, 0.5277777777777778, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.31446540880503143, 0.5277777777777778, 'x[25] <= 0.5\ngini = 0.26\nsamples = 65\nvalue = [55, 10]'), Text(0.29245283018867924, 0.4722222222222222, 'x[6] <= 0.5\ngini = 0.469\nsamples = 16\nvalue = [10, 6]'), Text(0.2861635220125786, 0.4166666666666667, 'gini = 0.0\nsamples = 7\nvalue = [7, 0]'), Text(0.29874213836477986, 0.4166666666666667, 'x[7] <= 0.833\ngini = 0.444\nsamples = 9\nvalue = [3, 6]'), Text(0.29245283018867924, 0.3611111111111111, 'gini = 0.0\nsamples = 5\nvalue = [0, 5]'), Text(0.3050314465408805, 0.3611111111111111, 'x[16] <= 0.072\ngini = 0.375\nsamples = 4\nvalue = [3, 1]'), Text(0.29874213836477986, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.3113207547169811, 0.3055555555555556, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.33647798742138363, 0.4722222222222222, 'x[2] <= 0.037\ngini = 0.15\nsamples = 49\nvalue = [45, 4]'), Text(0.330188679245283, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.34276729559748426, 0.4166666666666667, 'x[2] <= 0.938\ngini = 0.117\nsamples = 48\nvalue = [45, 3]'), Text(0.33647798742138363, 0.3611111111111111, 'x[5] <= 0.875\ngini = 0.081\nsamples = 47\nvalue = [45, 2]'), Text(0.3238993710691824, 0.3055555555555556, 'x[10] <= 0.167\ngini = 0.043\nsamples = 45\nvalue = [44, 1]'), Text(0.31761006289308175, 0.25, 'x[13] <= 0.5\ngini = 0.444\nsamples = 3\nvalue = [2, 1]'), Text(0.3113207547169811, 0.19444444444444445, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.3238993710691824, 0.19444444444444445, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.330188679245283, 0.25, 'gini = 0.0\nsamples = 42\nvalue = [42, 0]'), Text(0.3490566037735849, 0.3055555555555556, 'x[29] <= 0.088\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.34276729559748426, 0.25, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.3553459119496855, 0.25, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.3490566037735849, 0.3611111111111111, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.4056603773584906, 0.5833333333333334, 'x[6] <= 0.9\ngini = 0.077\nsamples = 125\nvalue = [120, 5]'), Text(0.39308176100628933, 0.5277777777777778, 'x[0] <= 0.393\ngini = 0.05\nsamples = 118\nvalue = [115, 3]'), Text(0.3867924528301887, 0.4722222222222222, 'x[2] <= 0.956\ngini = 0.185\nsamples = 29\nvalue = [26, 3]'), Text(0.3805031446540881, 0.4166666666666667, 'x[10] <= 0.167\ngini = 0.133\nsamples = 28\nvalue = [26, 2]'), Text(0.36792452830188677, 0.3611111111111111, 'x[2] <= 0.216\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.36163522012578614, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.3742138364779874, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.39308176100628933, 0.3611111111111111, 'x[29] <= 0.147\ngini = 0.074\nsamples = 26\nvalue = [25, 1]'), Text(0.3867924528301887, 0.3055555555555556, 'gini = 0.0\nsamples = 24\nvalue = [24, 0]'), Text(0.39937106918238996, 0.3055555555555556, 'x[22] <= 0.333\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.39308176100628933, 0.25, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.4056603773584906, 0.25, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.39308176100628933, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.39937106918238996, 0.4722222222222222, 'gini = 0.0\nsamples = 89\nvalue = [89, 0]'), Text(0.41823899371069184, 0.5277777777777778, 'x[9] <= 0.514\ngini = 0.408\nsamples = 7\nvalue = [5, 2]'), Text(0.4119496855345912, 0.4722222222222222, 'x[29] <= 0.059\ngini = 0.444\nsamples = 3\nvalue = [1, 2]'), Text(0.4056603773584906, 0.4166666666666667, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.41823899371069184, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.42452830188679247, 0.4722222222222222, 'gini = 0.0\nsamples = 4\nvalue = [4, 0]'), Text(0.570558176100629, 0.6944444444444444, 'x[26] <= 0.787\ngini = 0.1\nsamples = 493\nvalue = [467, 26]'), Text(0.5373427672955975, 0.6388888888888888, 'x[13] <= 0.5\ngini = 0.094\nsamples = 486\nvalue = [462, 24]'), Text(0.48977987421383645, 0.5833333333333334, 'x[12] <= 0.938\ngini = 0.154\nsamples = 191\nvalue = [175, 16]'), Text(0.4834905660377358, 0.5277777777777778, 'x[16] <= 0.481\ngini = 0.145\nsamples = 190\nvalue = [175, 15]'), Text(0.4638364779874214, 0.4722222222222222, 'x[29] <= 0.794\ngini = 0.221\nsamples = 95\nvalue = [83, 12]'), Text(0.45754716981132076, 0.4166666666666667, 'x[16] <= 0.47\ngini = 0.207\nsamples = 94\nvalue = [83, 11]'), Text(0.45125786163522014, 0.3611111111111111, 'x[5] <= 0.375\ngini = 0.192\nsamples = 93\nvalue = [83, 10]'), Text(0.42452830188679247, 0.3055555555555556, 'x[6] <= 0.9\ngini = 0.363\nsamples = 21\nvalue = [16, 5]'), Text(0.41823899371069184, 0.25, 'x[15] <= 0.413\ngini = 0.266\nsamples = 19\nvalue = [16, 3]'), Text(0.4056603773584906, 0.19444444444444445, 'x[4] <= 0.982\ngini = 0.117\nsamples = 16\nvalue = [15, 1]'), Text(0.39937106918238996, 0.1388888888888889, 'gini = 0.0\nsamples = 14\nvalue = [14, 0]'), Text(0.4119496855345912, 0.1388888888888889, 'x[19] <= 0.071\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.4056603773584906, 0.08333333333333333, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.41823899371069184, 0.08333333333333333, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.4308176100628931, 0.19444444444444445, 'x[19] <= 0.893\ngini = 0.444\nsamples = 3\nvalue = [1, 2]'), Text(0.42452830188679247, 0.1388888888888889, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.4371069182389937, 0.1388888888888889, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.4308176100628931, 0.25, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.4779874213836478, 0.3055555555555556, 'x[27] <= 0.139\ngini = 0.129\nsamples = 72\nvalue = [67, 5]'), Text(0.46226415094339623, 0.25, 'x[15] <= 0.244\ngini = 0.444\nsamples = 6\nvalue = [4, 2]'), Text(0.4559748427672956, 0.19444444444444445, 'x[17] <= 0.278\ngini = 0.444\nsamples = 3\nvalue = [1, 2]'), Text(0.449685534591195, 0.1388888888888889, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.46226415094339623, 0.1388888888888889, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.46855345911949686, 0.19444444444444445, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.4937106918238994, 0.25, 'x[9] <= 0.993\ngini = 0.087\nsamples = 66\nvalue = [63, 3]'), Text(0.4811320754716981, 0.19444444444444445, 'x[24] <= 0.583\ngini = 0.061\nsamples = 64\nvalue = [62, 2]'), Text(0.4748427672955975, 0.1388888888888889, 'gini = 0.0\nsamples = 51\nvalue = [51, 0]'), Text(0.48742138364779874, 0.1388888888888889, 'x[12] <= 0.812\ngini = 0.26\nsamples = 13\nvalue = [11, 2]'), Text(0.4811320754716981, 0.08333333333333333, 'gini = 0.0\nsamples = 9\nvalue = [9, 0]'), Text(0.4937106918238994, 0.08333333333333333, 'x[7] <= 0.5\ngini = 0.5\nsamples = 4\nvalue = [2, 2]'), Text(0.48742138364779874, 0.027777777777777776, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.5, 0.027777777777777776, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.5062893081761006, 0.19444444444444445, 'x[26] <= 0.263\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.5, 0.1388888888888889, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.5125786163522013, 0.1388888888888889, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.4638364779874214, 0.3611111111111111, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.470125786163522, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.5031446540880503, 0.4722222222222222, 'x[17] <= 0.5\ngini = 0.061\nsamples = 95\nvalue = [92, 3]'), Text(0.4968553459119497, 0.4166666666666667, 'gini = 0.0\nsamples = 76\nvalue = [76, 0]'), Text(0.5094339622641509, 0.4166666666666667, 'x[29] <= 0.088\ngini = 0.266\nsamples = 19\nvalue = [16, 3]'), Text(0.4968553459119497, 0.3611111111111111, 'x[2] <= 0.547\ngini = 0.444\nsamples = 3\nvalue = [1, 2]'), Text(0.49056603773584906, 0.3055555555555556, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.5031446540880503, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.5220125786163522, 0.3611111111111111, 'x[15] <= 0.108\ngini = 0.117\nsamples = 16\nvalue = [15, 1]'), Text(0.5157232704402516, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.5283018867924528, 0.3055555555555556, 'gini = 0.0\nsamples = 15\nvalue = [15, 0]'), Text(0.4960691823899371, 0.5277777777777778, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.5849056603773585, 0.5833333333333334, 'x[19] <= 0.036\ngini = 0.053\nsamples = 295\nvalue = [287, 8]'), Text(0.5628930817610063, 0.5277777777777778, 'x[28] <= 0.7\ngini = 0.159\nsamples = 46\nvalue = [42, 4]'), Text(0.5566037735849056, 0.4722222222222222, 'x[9] <= 0.071\ngini = 0.124\nsamples = 45\nvalue = [42, 3]'), Text(0.5408805031446541, 0.4166666666666667, 'x[5] <= 0.5\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.5345911949685535, 0.3611111111111111, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.5471698113207547, 0.3611111111111111, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.5723270440251572, 0.4166666666666667, 'x[23] <= 0.688\ngini = 0.089\nsamples = 43\nvalue = [41, 2]'), Text(0.559748427672956, 0.3611111111111111, 'x[12] <= 0.062\ngini = 0.048\nsamples = 41\nvalue = [40, 1]'), Text(0.5534591194968553, 0.3055555555555556, 'x[2] <= 0.487\ngini = 0.375\nsamples = 4\nvalue = [3, 1]'), Text(0.5471698113207547, 0.25, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.559748427672956, 0.25, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.5660377358490566, 0.3055555555555556, 'gini = 0.0\nsamples = 37\nvalue = [37, 0]'), Text(0.5849056603773585, 0.3611111111111111, 'x[17] <= 0.611\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.5786163522012578, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.5911949685534591, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.5691823899371069, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.6069182389937107, 0.5277777777777778, 'x[15] <= 0.056\ngini = 0.032\nsamples = 249\nvalue = [245, 4]'), Text(0.5911949685534591, 0.4722222222222222, 'x[14] <= 0.75\ngini = 0.32\nsamples = 5\nvalue = [4, 1]'), Text(0.5849056603773585, 0.4166666666666667, 'gini = 0.0\nsamples = 4\nvalue = [4, 0]'), Text(0.5974842767295597, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.6226415094339622, 0.4722222222222222, 'x[2] <= 0.015\ngini = 0.024\nsamples = 244\nvalue = [241, 3]'), Text(0.610062893081761, 0.4166666666666667, 'x[2] <= 0.013\ngini = 0.278\nsamples = 6\nvalue = [5, 1]'), Text(0.6037735849056604, 0.3611111111111111, 'gini = 0.0\nsamples = 5\nvalue = [5, 0]'), Text(0.6163522012578616, 0.3611111111111111, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.6352201257861635, 0.4166666666666667, 'x[21] <= 0.167\ngini = 0.017\nsamples = 238\nvalue = [236, 2]'), Text(0.6289308176100629, 0.3611111111111111, 'x[25] <= 0.833\ngini = 0.073\nsamples = 53\nvalue = [51, 2]'), Text(0.6163522012578616, 0.3055555555555556, 'x[29] <= 0.088\ngini = 0.041\nsamples = 48\nvalue = [47, 1]'), Text(0.610062893081761, 0.25, 'x[16] <= 0.824\ngini = 0.245\nsamples = 7\nvalue = [6, 1]'), Text(0.6037735849056604, 0.19444444444444445, 'gini = 0.0\nsamples = 6\nvalue = [6, 0]'), Text(0.6163522012578616, 0.19444444444444445, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.6226415094339622, 0.25, 'gini = 0.0\nsamples = 41\nvalue = [41, 0]'), Text(0.6415094339622641, 0.3055555555555556, 'x[28] <= 0.367\ngini = 0.32\nsamples = 5\nvalue = [4, 1]'), Text(0.6352201257861635, 0.25, 'gini = 0.0\nsamples = 4\nvalue = [4, 0]'), Text(0.6477987421383647, 0.25, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.6415094339622641, 0.3611111111111111, 'gini = 0.0\nsamples = 185\nvalue = [185, 0]'), Text(0.6037735849056604, 0.6388888888888888, 'x[8] <= 0.5\ngini = 0.408\nsamples = 7\nvalue = [5, 2]'), Text(0.5974842767295597, 0.5833333333333334, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.610062893081761, 0.5833333333333334, 'gini = 0.0\nsamples = 5\nvalue = [5, 0]'), Text(0.4488011006289308, 0.75, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.8242924528301887, 0.8611111111111112, 'x[15] <= 0.157\ngini = 0.385\nsamples = 300\nvalue = [222, 78]'), Text(0.7382075471698113, 0.8055555555555556, 'x[22] <= 0.167\ngini = 0.5\nsamples = 96\nvalue = [49, 47]'), Text(0.7044025157232704, 0.75, 'x[4] <= 0.161\ngini = 0.459\nsamples = 42\nvalue = [15, 27]'), Text(0.6792452830188679, 0.6944444444444444, 'x[16] <= 0.41\ngini = 0.499\nsamples = 23\nvalue = [12, 11]'), Text(0.660377358490566, 0.6388888888888888, 'x[15] <= 0.061\ngini = 0.426\nsamples = 13\nvalue = [4, 9]'), Text(0.6540880503144654, 0.5833333333333334, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.6666666666666666, 0.5833333333333334, 'x[9] <= 0.993\ngini = 0.298\nsamples = 11\nvalue = [2, 9]'), Text(0.660377358490566, 0.5277777777777778, 'x[0] <= 0.214\ngini = 0.18\nsamples = 10\nvalue = [1, 9]'), Text(0.6540880503144654, 0.4722222222222222, 'x[2] <= 0.406\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.6477987421383647, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.660377358490566, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.6666666666666666, 0.4722222222222222, 'gini = 0.0\nsamples = 8\nvalue = [0, 8]'), Text(0.6729559748427673, 0.5277777777777778, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.6981132075471698, 0.6388888888888888, 'x[7] <= 0.167\ngini = 0.32\nsamples = 10\nvalue = [8, 2]'), Text(0.6918238993710691, 0.5833333333333334, 'x[2] <= 0.544\ngini = 0.444\nsamples = 3\nvalue = [1, 2]'), Text(0.6855345911949685, 0.5277777777777778, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.6981132075471698, 0.5277777777777778, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.7044025157232704, 0.5833333333333334, 'gini = 0.0\nsamples = 7\nvalue = [7, 0]'), Text(0.7295597484276729, 0.6944444444444444, 'x[11] <= 0.125\ngini = 0.266\nsamples = 19\nvalue = [3, 16]'), Text(0.7232704402515723, 0.6388888888888888, 'x[9] <= 0.2\ngini = 0.198\nsamples = 18\nvalue = [2, 16]'), Text(0.7169811320754716, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.7295597484276729, 0.5833333333333334, 'x[28] <= 0.433\ngini = 0.111\nsamples = 17\nvalue = [1, 16]'), Text(0.7232704402515723, 0.5277777777777778, 'gini = 0.0\nsamples = 15\nvalue = [0, 15]'), Text(0.7358490566037735, 0.5277777777777778, 'x[29] <= 0.235\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.7295597484276729, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.7421383647798742, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.7358490566037735, 0.6388888888888888, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.7720125786163522, 0.75, 'x[0] <= 0.202\ngini = 0.466\nsamples = 54\nvalue = [34, 20]'), Text(0.7547169811320755, 0.6944444444444444, 'x[0] <= 0.107\ngini = 0.245\nsamples = 7\nvalue = [1, 6]'), Text(0.7484276729559748, 0.6388888888888888, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.7610062893081762, 0.6388888888888888, 'gini = 0.0\nsamples = 6\nvalue = [0, 6]'), Text(0.789308176100629, 0.6944444444444444, 'x[2] <= 0.622\ngini = 0.418\nsamples = 47\nvalue = [33, 14]'), Text(0.7735849056603774, 0.6388888888888888, 'x[2] <= 0.145\ngini = 0.482\nsamples = 32\nvalue = [19, 13]'), Text(0.7610062893081762, 0.5833333333333334, 'x[26] <= 0.237\ngini = 0.18\nsamples = 10\nvalue = [9, 1]'), Text(0.7547169811320755, 0.5277777777777778, 'gini = 0.0\nsamples = 9\nvalue = [9, 0]'), Text(0.7672955974842768, 0.5277777777777778, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.7861635220125787, 0.5833333333333334, 'x[16] <= 0.87\ngini = 0.496\nsamples = 22\nvalue = [10, 12]'), Text(0.779874213836478, 0.5277777777777778, 'x[25] <= 0.833\ngini = 0.465\nsamples = 19\nvalue = [7, 12]'), Text(0.7735849056603774, 0.4722222222222222, 'x[17] <= 0.167\ngini = 0.415\nsamples = 17\nvalue = [5, 12]'), Text(0.7610062893081762, 0.4166666666666667, 'x[19] <= 0.321\ngini = 0.49\nsamples = 7\nvalue = [4, 3]'), Text(0.7547169811320755, 0.3611111111111111, 'gini = 0.0\nsamples = 4\nvalue = [4, 0]'), Text(0.7672955974842768, 0.3611111111111111, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.7861635220125787, 0.4166666666666667, 'x[12] <= 0.188\ngini = 0.18\nsamples = 10\nvalue = [1, 9]'), Text(0.779874213836478, 0.3611111111111111, 'x[19] <= 0.393\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.7735849056603774, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.7861635220125787, 0.3055555555555556, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.7924528301886793, 0.3611111111111111, 'gini = 0.0\nsamples = 8\nvalue = [0, 8]'), Text(0.7861635220125787, 0.4722222222222222, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.7924528301886793, 0.5277777777777778, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.8050314465408805, 0.6388888888888888, 'x[9] <= 0.064\ngini = 0.124\nsamples = 15\nvalue = [14, 1]'), Text(0.7987421383647799, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.8113207547169812, 0.5833333333333334, 'gini = 0.0\nsamples = 14\nvalue = [14, 0]'), Text(0.910377358490566, 0.8055555555555556, 'x[14] <= 0.75\ngini = 0.258\nsamples = 204\nvalue = [173, 31]'), Text(0.8553459119496856, 0.75, 'x[15] <= 0.992\ngini = 0.138\nsamples = 147\nvalue = [136, 11]'), Text(0.8490566037735849, 0.6944444444444444, 'x[4] <= 0.482\ngini = 0.128\nsamples = 146\nvalue = [136, 10]'), Text(0.8301886792452831, 0.6388888888888888, 'x[26] <= 0.063\ngini = 0.038\nsamples = 104\nvalue = [102, 2]'), Text(0.8238993710691824, 0.5833333333333334, 'x[9] <= 0.193\ngini = 0.32\nsamples = 10\nvalue = [8, 2]'), Text(0.8176100628930818, 0.5277777777777778, 'x[15] <= 0.599\ngini = 0.444\nsamples = 3\nvalue = [1, 2]'), Text(0.8113207547169812, 0.4722222222222222, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.8238993710691824, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.8301886792452831, 0.5277777777777778, 'gini = 0.0\nsamples = 7\nvalue = [7, 0]'), Text(0.8364779874213837, 0.5833333333333334, 'gini = 0.0\nsamples = 94\nvalue = [94, 0]'), Text(0.8679245283018868, 0.6388888888888888, 'x[7] <= 0.167\ngini = 0.308\nsamples = 42\nvalue = [34, 8]'), Text(0.8490566037735849, 0.5833333333333334, 'x[2] <= 0.736\ngini = 0.375\nsamples = 4\nvalue = [1, 3]'), Text(0.8427672955974843, 0.5277777777777778, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.8553459119496856, 0.5277777777777778, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.8867924528301887, 0.5833333333333334, 'x[0] <= 0.393\ngini = 0.229\nsamples = 38\nvalue = [33, 5]'), Text(0.8679245283018868, 0.5277777777777778, 'x[1] <= 0.25\ngini = 0.5\nsamples = 6\nvalue = [3, 3]'), Text(0.8616352201257862, 0.4722222222222222, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.8742138364779874, 0.4722222222222222, 'x[5] <= 0.625\ngini = 0.375\nsamples = 4\nvalue = [1, 3]'), Text(0.8679245283018868, 0.4166666666666667, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.8805031446540881, 0.4166666666666667, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.9056603773584906, 0.5277777777777778, 'x[24] <= 0.917\ngini = 0.117\nsamples = 32\nvalue = [30, 2]'), Text(0.89937106918239, 0.4722222222222222, 'x[12] <= 0.812\ngini = 0.062\nsamples = 31\nvalue = [30, 1]'), Text(0.8930817610062893, 0.4166666666666667, 'gini = 0.0\nsamples = 28\nvalue = [28, 0]'), Text(0.9056603773584906, 0.4166666666666667, 'x[0] <= 0.69\ngini = 0.444\nsamples = 3\nvalue = [2, 1]'), Text(0.89937106918239, 0.3611111111111111, 'gini = 0.0\nsamples = 2\nvalue = [2, 0]'), Text(0.9119496855345912, 0.3611111111111111, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.9119496855345912, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.8616352201257862, 0.6944444444444444, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.9654088050314465, 0.75, 'x[12] <= 0.812\ngini = 0.456\nsamples = 57\nvalue = [37, 20]'), Text(0.9433962264150944, 0.6944444444444444, 'x[28] <= 0.4\ngini = 0.238\nsamples = 29\nvalue = [25, 4]'), Text(0.9308176100628931, 0.6388888888888888, 'x[9] <= 0.964\ngini = 0.142\nsamples = 26\nvalue = [24, 2]'), Text(0.9245283018867925, 0.5833333333333334, 'x[20] <= 0.5\ngini = 0.077\nsamples = 25\nvalue = [24, 1]'), Text(0.9182389937106918, 0.5277777777777778, 'gini = 0.0\nsamples = 23\nvalue = [23, 0]'), Text(0.9308176100628931, 0.5277777777777778, 'x[12] <= 0.375\ngini = 0.5\nsamples = 2\nvalue = [1, 1]'), Text(0.9245283018867925, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.9371069182389937, 0.4722222222222222, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.9371069182389937, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.9559748427672956, 0.6388888888888888, 'x[28] <= 0.933\ngini = 0.444\nsamples = 3\nvalue = [1, 2]'), Text(0.949685534591195, 0.5833333333333334, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.9622641509433962, 0.5833333333333334, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'), Text(0.9874213836477987, 0.6944444444444444, 'x[28] <= 0.1\ngini = 0.49\nsamples = 28\nvalue = [12, 16]'), Text(0.9811320754716981, 0.6388888888888888, 'x[4] <= 0.804\ngini = 0.48\nsamples = 20\nvalue = [12, 8]'), Text(0.9748427672955975, 0.5833333333333334, 'x[26] <= 0.013\ngini = 0.415\nsamples = 17\nvalue = [12, 5]'), Text(0.9685534591194969, 0.5277777777777778, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.9811320754716981, 0.5277777777777778, 'x[21] <= 0.5\ngini = 0.32\nsamples = 15\nvalue = [12, 3]'), Text(0.9748427672955975, 0.4722222222222222, 'x[7] <= 0.167\ngini = 0.5\nsamples = 6\nvalue = [3, 3]'), Text(0.9685534591194969, 0.4166666666666667, 'gini = 0.0\nsamples = 2\nvalue = [0, 2]'), Text(0.9811320754716981, 0.4166666666666667, 'x[24] <= 0.083\ngini = 0.375\nsamples = 4\nvalue = [3, 1]'), Text(0.9748427672955975, 0.3611111111111111, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'), Text(0.9874213836477987, 0.3611111111111111, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'), Text(0.9874213836477987, 0.4722222222222222, 'gini = 0.0\nsamples = 9\nvalue = [9, 0]'), Text(0.9874213836477987, 0.5833333333333334, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]'), Text(0.9937106918238994, 0.6388888888888888, 'gini = 0.0\nsamples = 8\nvalue = [0, 8]')]
from sklearn.model_selection import GridSearchCV
parameter={
'criterion':['gini','entropy'],
'splitter':['best','random'],
'max_depth':[1,2,3,4,5,6,7,8,9,10],
'max_features':['sqrt']
}
grid_search=GridSearchCV(estimator=dtc,param_grid=parameter,cv=5,scoring="accuracy")
grid_search.fit(x_train,y_train)
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),
param_grid={'criterion': ['gini', 'entropy'],
'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'max_features': ['sqrt'],
'splitter': ['best', 'random']},
scoring='accuracy')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=5, estimator=DecisionTreeClassifier(),
param_grid={'criterion': ['gini', 'entropy'],
'max_depth': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'max_features': ['sqrt'],
'splitter': ['best', 'random']},
scoring='accuracy')DecisionTreeClassifier()
DecisionTreeClassifier()
grid_search.best_params_
{'criterion': 'entropy',
'max_depth': 5,
'max_features': 'auto',
'splitter': 'random'}
dtc_cv=DecisionTreeClassifier(criterion= 'entropy',max_depth= 4,max_features= 'sqrt',splitter= 'best')
dtc_cv.fit(x_train,y_train)
DecisionTreeClassifier(criterion='entropy', max_depth=4, max_features='sqrt')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(criterion='entropy', max_depth=4, max_features='sqrt')
print(classification_report(y_test,y_pred))
precision recall f1-score support
No 0.86 0.84 0.85 245
Yes 0.30 0.33 0.31 49
accuracy 0.76 294
macro avg 0.58 0.59 0.58 294
weighted avg 0.77 0.76 0.76 294
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators = 1000, criterion = 'entropy', random_state = 0)
classifier.fit(x_train, y_train)
RandomForestClassifier(criterion='entropy', n_estimators=1000, random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestClassifier(criterion='entropy', n_estimators=1000, random_state=0)
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)
[[243 2] [ 41 8]]
0.8537414965986394
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier()
forest_params = [{'max_depth': list(range(10, 15)),'max_features': ['sqrt', 'log2', 1, 0.5, None]}]
rfc_cv=GridSearchCV(rfc,param_grid=forest_params,cv=10,scoring="accuracy")
rfc_cv.fit(x_train,y_train)
GridSearchCV(cv=10, estimator=RandomForestClassifier(),
param_grid=[{'max_depth': [10, 11, 12, 13, 14],
'max_features': ['sqrt', 'log2', 1, 0.5, None]}],
scoring='accuracy')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=10, estimator=RandomForestClassifier(),
param_grid=[{'max_depth': [10, 11, 12, 13, 14],
'max_features': ['sqrt', 'log2', 1, 0.5, None]}],
scoring='accuracy')RandomForestClassifier()
RandomForestClassifier()
print(classification_report(y_test,y_pred))
precision recall f1-score support
No 0.86 0.99 0.92 245
Yes 0.80 0.16 0.27 49
accuracy 0.85 294
macro avg 0.83 0.58 0.59 294
weighted avg 0.85 0.85 0.81 294